"""
Created on 2017年8月2日

@author: xiaoj
"""
from src.html_downloader import HtmlDownloader
from src.html_output import HtmlOutPut
from src.html_parser import HtmlParser
from src.url_manager import UrlManager


class CrawlerMain:
    def __init__(self):
        self.urlManager = UrlManager()
        self.downLoader = HtmlDownloader()
        self.parser = HtmlParser()
        self.output = HtmlOutPut()

    def crawler(self, url):
        num = 1
        self.urlManager.add_url(url)
        try:
            while self.urlManager.has_next():
                new_url = self.urlManager.get_new_url();
                text = self.downLoader.download(new_url)
                new_urls, data = self.parser.parse(text, new_url)
                self.urlManager.add_urls(new_urls)
                if data:
                    self.output.add_data(data)
                if num > 1000:
                    break
                print("第%f次任务" % num)
                num += 1
        except Exception as e:
            print(e)
            print("download fail")
        self.output.print_data()

if __name__ == '__main__':
    x = CrawlerMain()
    x.crawler("https://baike.baidu.com/item/Python")
